// ==UserScript==
// @id CNKI_PDF_Supernova
// @name 知网PDF下载助手
// @version 3.2.1
// @namespace https://github.com/supernovaZhangJiaXing/Tampermonkey/
// @author Supernova
// @description 直接以PDF格式下载知网上的文献, 包括期刊论文和博硕士论文
// @include http*://*.cnki.net/*
// @include http*://*.cnki.net.*/*
// @include */DefaultResult/Index*
// @include */KNS8/AdvSearch*
// @include */detail.aspx*
// @include */CatalogViewPage.aspx*
// @include */Article/*
// @include */kns/brief/*
// @include */kns55/brief/*
// @include */grid2008/brief/*
// @include */detail/detail.aspx*
// @exclude http://image.cnki.net/*
// @require https://unpkg.com/pdf-lib@1.13.0/dist/pdf-lib.js
// @require https://cdn.jsdelivr.net/gh/tabedit/code-segment@f730e9d9573c4ca7e758766e6a8fb282faae38af/outline.esm.js
// @run-at document-idle
// @grant unsafeWindow
// @grant GM_setClipboard
// @grant GM_xmlhttpRequest
// @downloadURL none
// ==/UserScript==
'use strict';
var $ = unsafeWindow.jQuery;
var contents = '';
var pdf;
$(document).ready(function() {
var myurl = window.location.href;
var isDetailPage = myurl.indexOf("detail.aspx") != -1 ? true: false; // 点进文献后的详情页
var isContentPage = myurl.indexOf("kdoc/download.aspx?") != -1 ? true : false; // 分章下载
if (isDetailPage === false) {
if (window.location.href.indexOf("kns8") != -1){ // 文献检索页面, 防止在别处出现
$(document).ajaxSuccess(function() {
if (arguments[2].url.indexOf('/Brief/GetGridTableHtml') + 1) {
// 防止标志-链接不匹配, 统一默认转换为PDF
$('.downloadlink').attr('href', toPDF);
convert_box.innerText = "当前默认下载格式: PDF (点击转换)";
}
});
var convert_box = document.createElement("div");
convert_box.innerText = "当前默认下载格式: PDF (点击转换)";
convert_box.className = "ecp_tn-tab";
convert_box.style = "color: blue; font-weight: bolder";
convert_box.onclick = function() {
if (convert_box.innerText == "当前默认下载格式: PDF (点击转换)") {
convert_box.innerText = "当前默认下载格式: CAJ (点击转换)";
$('.downloadlink').attr('href', toCAJ);
} else if (convert_box.innerText == "当前默认下载格式: CAJ (点击转换)") {
convert_box.innerText = "当前默认下载格式: PDF (点击转换)";
$('.downloadlink').attr('href', toPDF);
}
};
var mycnki = $('#icnki');
mycnki.parent()[0].insertBefore(convert_box, mycnki.nextElementSibling);
}
}
// 如果进了详情页, 博硕士论文下面会出现五个个按钮: 手机, 整本, 分页, 分章, 在线
// 期刊论文下会有三个按钮
else {
// 只对"博硕论文"详情页做优化, 否则影响期刊页面的显示
// 新版界面更改了详情页的显示方式, 通过类似CMFD的字样判断是否为博硕论文页面
if (location.search.match(/dbcode=C[DM][FM]D&/i)) {
// 整本下载替换为CAJ下载
$(".btn-dlcaj").first().html($(".btn-dlcaj").first().html().replace("整本", "CAJ"));
// pdf文件的url
var pdf_url = $(".btn-dlpdf").remove().find("a").attr("href").replace("&dflag=downpage", "&dflag=pdfdown");
// 添加PDF下载
var pdf_down = $('
PDF下载');
$(".btn-dlcaj").first().after(pdf_down);
// 从分章下载获取目录的URL
var content_url = $(".btn-dlcaj:eq(1)").find("a").attr("href") || '?';
content_url = 'https://chn.oversea.cnki.net/kcms/download.aspx' + content_url.match(/\?.*/)[0];
GM_xmlhttpRequest({method: 'GET', url: content_url, onload: manage_contents});
// 右侧添加使用说明
$(".operate-btn").append($('脚本说明'));
// 右侧底部添加工具下载(PdgContentEditor)
var cnt_util_for_mac = gen_py_for_FUCKING_mac();
$(".opts-down").append($('').click(function(){
var data = new Blob([gen_py_for_FUCKING_mac()],{type:"text/plain; charset=UTF-8"});
$(this).find('a').last().attr("download", 'pdf_utils.py');
$(this).find('a').last().attr("href", window.URL.createObjectURL(data));
window.URL.revokeObjectURL(data);
}))
}
}
});
function gen_py_for_FUCKING_mac() {
return "# coding:utf-8\n"
+ "# 往pdf文件中添加书签\n"
+ "# 来源: https://www.jianshu.com/p/1aac3ae4d620\n"
+ "# 执行前需要安装库 PyPDF2: pip install PyPDF2\n"
+ "import os\n"
+ "from PyPDF2 import PdfFileReader as reader, PdfFileWriter as writer\n"
+ "\n"
+ "\n"
+ "class PDFHandleMode(object):\n"
+ " '''\n"
+ " 处理PDF文件的模式\n"
+ " '''\n"
+ " # 保留源PDF文件的所有内容和信息,在此基础上修改\n"
+ " COPY = 'copy'\n"
+ " # 仅保留源PDF文件的页面内容,在此基础上修改\n"
+ " NEWLY = 'newly'\n"
+ "\n"
+ "\n"
+ "def add_outline(file_name, content_file_name):\n"
+ " '''\n"
+ " 给PDF文件挂载书签\n"
+ " :param file_name: PDF文件路径\n"
+ " :param content_file_name: 目录文件路径\n"
+ " '''\n"
+ " pdf_handler = MyPDFHandler(file_name, mode=PDFHandleMode.NEWLY)\n"
+ " pdf_handler.add_bookmarks_by_read_txt(content_file_name)\n"
+ " pdf_handler.save2file(file_name.split('.')[0] + u'-目录书签版.pdf')\n"
+ "\n"
+ "\n"
+ "def read_bookmarks_from_txt(txt_file_path, page_offset=0):\n"
+ " '''\n"
+ " 从文本文件中读取书签列表\n"
+ " 文本文件有若干行,每行一个书签,内容格式为:\n"
+ " 书签标题\t页码\n"
+ " 注:中间用空格隔开,页码为1表示第1页\n"
+ " :param txt_file_path: 书签信息文本文件路径\n"
+ " :param page_offset: 页码便宜量,为0或正数,即由于封面、目录等页面的存在,在PDF中实际的绝对页码比在目录中写的页码多出的差值\n"
+ " :return: 书签列表\n"
+ " '''\n"
+ " bookmarks = []\n"
+ " with open(txt_file_path, 'r', encoding='utf-8') as fin:\n"
+ " for line in fin:\n"
+ " line = line.rstrip()\n"
+ " if not line:\n"
+ " continue\n"
+ " # 以'@'作为标题、页码分隔符\n"
+ " print('read line is: {0}'.format(line))\n"
+ " try:\n"
+ " title = line.split('\t')[0].rstrip()\n"
+ " page = line.split('\t')[1].strip()\n"
+ " except IndexError as msg:\n"
+ " print(msg)\n"
+ " continue\n"
+ " # title和page都不为空才添加书签,否则不添加\n"
+ " if title and page:\n"
+ " try:\n"
+ " page = int(page) + page_offset\n"
+ " bookmarks.append((title, page))\n"
+ " except ValueError as msg:\n"
+ " print(msg)\n"
+ "\n"
+ " return bookmarks\n"
+ "\n"
+ "\n"
+ "class MyPDFHandler(object):\n"
+ " '''\n"
+ " 封装的PDF文件处理类\n"
+ " '''\n"
+ "\n"
+ " def __init__(self, pdf_file_path, mode=PDFHandleMode.COPY):\n"
+ " '''\n"
+ " 用一个PDF文件初始化\n"
+ " :param pdf_file_path: PDF文件路径\n"
+ " :param mode: 处理PDF文件的模式,默认为PDFHandleMode.COPY模式\n"
+ " '''\n"
+ " # 只读的PDF对象\n"
+ " self.__pdf = reader(pdf_file_path, strict=False)\n"
+ "\n"
+ " # 获取PDF文件名(不带路径)\n"
+ " self.file_name = os.path.basename(pdf_file_path)\n"
+ " #\n"
+ " self.metadata = self.__pdf.getXmpMetadata()\n"
+ " #\n"
+ " self.doc_info = self.__pdf.getDocumentInfo()\n"
+ " #\n"
+ " self.pages_num = self.__pdf.getNumPages()\n"
+ "\n"
+ " # 可写的PDF对象,根据不同的模式进行初始化\n"
+ " self.__writeable_pdf = writer()\n"
+ " if mode == PDFHandleMode.COPY:\n"
+ " self.__writeable_pdf.cloneDocumentFromReader(self.__pdf)\n"
+ " elif mode == PDFHandleMode.NEWLY:\n"
+ " for idx in range(self.pages_num):\n"
+ " page = self.__pdf.getPage(idx)\n"
+ " self.__writeable_pdf.insertPage(page, idx)\n"
+ "\n"
+ " def save2file(self, new_file_name):\n"
+ " '''\n"
+ " 将修改后的PDF保存成文件\n"
+ " :param new_file_name: 新文件名,不要和原文件名相同\n"
+ " :return: None\n"
+ " '''\n"
+ " # 保存修改后的PDF文件内容到文件中\n"
+ " with open(new_file_name, 'wb') as fout:\n"
+ " self.__writeable_pdf.write(fout)\n"
+ " print('save2file success! new file is: {0}'.format(new_file_name))\n"
+ "\n"
+ " def add_one_bookmark(self, title, page, parent=None, color=None, fit='/Fit'):\n"
+ " '''\n"
+ " 往PDF文件中添加单条书签,并且保存为一个新的PDF文件\n"
+ " :param str title: 书签标题\n"
+ " :param int page: 书签跳转到的页码,表示的是PDF中的绝对页码,值为1表示第一页\n"
+ " :paran parent: A reference to a parent bookmark to create nested bookmarks.\n"
+ " :param tuple color: Color of the bookmark as a red, green, blue tuple from 0.0 to 1.0\n"
+ " :param list bookmarks: 是一个'(书签标题,页码)'二元组列表,举例:[(u'tag1',1),(u'tag2',5)],页码为1代表第一页\n"
+ " :param str fit: 跳转到书签页后的缩放方式\n"
+ " :return: None\n"
+ " '''\n"
+ " # 为了防止乱码,这里对title进行utf-8编码\n"
+ " # Supernova: 好像不需要, 先不编码\n"
+ " self.__writeable_pdf.addBookmark(title, page - 1, parent=parent, color=color, fit=fit)\n"
+ " print('add_one_bookmark success! bookmark title is: {0}'.format(title))\n"
+ "\n"
+ " def add_bookmarks(self, bookmarks):\n"
+ " '''\n"
+ " 批量添加书签\n"
+ " :param bookmarks: 书签元组列表,其中的页码表示的是PDF中的绝对页码,值为1表示第一页\n"
+ " :return: None\n"
+ " '''\n"
+ " for title, page in bookmarks:\n"
+ " self.add_one_bookmark(title, page)\n"
+ " print('add_bookmarks success! add {0} pieces of bookmarks to PDF file'.format(len(bookmarks)))\n"
+ "\n"
+ " def add_bookmarks_by_read_txt(self, txt_file_path, page_offset=0):\n"
+ " '''\n"
+ " 通过读取书签列表信息文本文件,将书签批量添加到PDF文件中\n"
+ " :param txt_file_path: 书签列表信息文本文件\n"
+ " :param page_offset: 页码便宜量,为0或正数,即由于封面、目录等页面的存在,在PDF中实际的绝对页码比在目录中写的页码多出的差值\n"
+ " :return: None\n"
+ " '''\n"
+ " bookmarks = read_bookmarks_from_txt(txt_file_path, page_offset)\n"
+ " self.add_bookmarks(bookmarks)\n"
+ " print('add_bookmarks_by_read_txt success!')\n"
+ "\n"
+ "\n"
+ "def main(bookname, outline):\n"
+ " # 读取PDF文件,创建PdfFileReader对象\n"
+ " book = reader(bookname, strict=False)\n"
+ "\n"
+ " # 创建PdfFileWriter对象,并用拷贝reader对象进行初始化\n"
+ " pdf = writer()\n"
+ " pdf.cloneDocumentFromReader(book)\n"
+ "\n"
+ " # 添加书签\n"
+ " # 注意:页数是从0开始的,中文要用unicode字符串,否则会出现乱码\n"
+ " # 如果这里的页码超过文档的最大页数,会报IndexError异常\n"
+ " pdf.addBookmark(outline, 0)\n"
+ "\n"
+ " # 保存修改后的PDF文件内容到文件中\n"
+ " # 注意:这里必须用二进制的'wb'模式来写文件,否则写到文件中的内容都为乱码\n"
+ " with open('book-with-bookmark.pdf', 'wb') as fout:\n"
+ " pdf.write(fout)\n"
+ "\n"
+ "\n"
+ "if __name__ == '__main__':\n"
+ " pdf = input('请输入待添加书签的PDF文件的全路径或将其拖至此处: \n')\n"
+ " outline = input('请输入待添加的目录索引文件的全路径或将其拖至此处: \n')\n"
+ " add_outline(pdf.rstrip(), outline.rstrip())\n"
+ " input('添加完成, 按任意键退出')\n"
+ "\n";
}
function toCAJ() {
return $(this).data('CAJ', this.href.replace(/&dflag=\w*|$/, '&dflag=nhdown')).data("CAJ");
}
function toPDF() {
return $(this).data('PDF', this.href.replace(/&dflag=\w*|$/, '&dflag=pdfdown')).data("PDF");
}
function get_content(cnt_list){
var contents = "";
for (var i = 0; i < cnt_list.length - 1; i++) { // 长度减一, 因为最后一个是text
var cnt_item = cnt_list[i].childNodes[1].childNodes[1];
cnt_item = cnt_item.innerHTML;
var cnt_page = cnt_list[i].childNodes[3].childNodes[0].textContent.trim().split("-")[0]; // 知网的目录给的是个范围, 正常只需要前半部分
contents = contents + cnt_item.trim().replace(/ /g, " ").replace(/ {4}/g, "\t") + "\t" + cnt_page + "\r\n";
}
return contents;
}
function manage_contents(xhr) {
var cnt_list = $('tr', xhr.responseText); // 目录列表
var contents = get_content(cnt_list); // 目录内容
// 添加目录复制
$('.btn-dlpdf').first().after($('目录复制').click(function() {
GM_setClipboard(contents); // 运用油猴脚本自带的复制函数
window.alert('目录已复制到剪贴板');
}));
// 添加目录下载
$('.btn-dlpdf').first().after($('目录下载').click(function() {
var data = new Blob([contents],{type:"text/plain; charset=UTF-8"});
$(this).find('a').attr("download", '目录_' + $('.wx-tit h1:first-child()').text().trim() + '.txt');
$(this).find('a').attr("href", window.URL.createObjectURL(data));
window.URL.revokeObjectURL(data);
window.alert("目录索引已保存, 请使用PdgCntEditor软件将目录整合到PDF中");
}));
}