// ==UserScript== // @id CNKI_PDF_Supernova // @name 知网PDF下载助手 // @version 3.2.1 // @namespace https://github.com/supernovaZhangJiaXing/Tampermonkey/ // @author Supernova // @description 直接以PDF格式下载知网上的文献, 包括期刊论文和博硕士论文 // @include http*://*.cnki.net/* // @include http*://*.cnki.net.*/* // @include */DefaultResult/Index* // @include */KNS8/AdvSearch* // @include */detail.aspx* // @include */CatalogViewPage.aspx* // @include */Article/* // @include */kns/brief/* // @include */kns55/brief/* // @include */grid2008/brief/* // @include */detail/detail.aspx* // @exclude http://image.cnki.net/* // @require https://unpkg.com/pdf-lib@1.13.0/dist/pdf-lib.js // @require https://cdn.jsdelivr.net/gh/tabedit/code-segment@f730e9d9573c4ca7e758766e6a8fb282faae38af/outline.esm.js // @run-at document-idle // @grant unsafeWindow // @grant GM_setClipboard // @grant GM_xmlhttpRequest // @downloadURL none // ==/UserScript== 'use strict'; var $ = unsafeWindow.jQuery; var contents = ''; var pdf; $(document).ready(function() { var myurl = window.location.href; var isDetailPage = myurl.indexOf("detail.aspx") != -1 ? true: false; // 点进文献后的详情页 var isContentPage = myurl.indexOf("kdoc/download.aspx?") != -1 ? true : false; // 分章下载 if (isDetailPage === false) { if (window.location.href.indexOf("kns8") != -1){ // 文献检索页面, 防止在别处出现 $(document).ajaxSuccess(function() { if (arguments[2].url.indexOf('/Brief/GetGridTableHtml') + 1) { // 防止标志-链接不匹配, 统一默认转换为PDF $('.downloadlink').attr('href', toPDF); convert_box.innerText = "当前默认下载格式: PDF (点击转换)"; } }); var convert_box = document.createElement("div"); convert_box.innerText = "当前默认下载格式: PDF (点击转换)"; convert_box.className = "ecp_tn-tab"; convert_box.style = "color: blue; font-weight: bolder"; convert_box.onclick = function() { if (convert_box.innerText == "当前默认下载格式: PDF (点击转换)") { convert_box.innerText = "当前默认下载格式: CAJ (点击转换)"; $('.downloadlink').attr('href', toCAJ); } else if (convert_box.innerText == "当前默认下载格式: CAJ (点击转换)") { convert_box.innerText = "当前默认下载格式: PDF (点击转换)"; $('.downloadlink').attr('href', toPDF); } }; var mycnki = $('#icnki'); mycnki.parent()[0].insertBefore(convert_box, mycnki.nextElementSibling); } } // 如果进了详情页, 博硕士论文下面会出现五个个按钮: 手机, 整本, 分页, 分章, 在线 // 期刊论文下会有三个按钮 else { // 只对"博硕论文"详情页做优化, 否则影响期刊页面的显示 // 新版界面更改了详情页的显示方式, 通过类似CMFD的字样判断是否为博硕论文页面 if (location.search.match(/dbcode=C[DM][FM]D&/i)) { // 整本下载替换为CAJ下载 $(".btn-dlcaj").first().html($(".btn-dlcaj").first().html().replace("整本", "CAJ")); // pdf文件的url var pdf_url = $(".btn-dlpdf").remove().find("a").attr("href").replace("&dflag=downpage", "&dflag=pdfdown"); // 添加PDF下载 var pdf_down = $('
  • PDF下载
  • '); $(".btn-dlcaj").first().after(pdf_down); // 从分章下载获取目录的URL var content_url = $(".btn-dlcaj:eq(1)").find("a").attr("href") || '?'; content_url = 'https://chn.oversea.cnki.net/kcms/download.aspx' + content_url.match(/\?.*/)[0]; GM_xmlhttpRequest({method: 'GET', url: content_url, onload: manage_contents}); // 右侧添加使用说明 $(".operate-btn").append($('
  • 脚本说明
  • ')); // 右侧底部添加工具下载(PdgContentEditor) var cnt_util_for_mac = gen_py_for_FUCKING_mac(); $(".opts-down").append($('

    ' + '目录合并软件: PdgContentEditor下载' + 'Mac系统合并目录办法: 保存此代码并用Python 3运行 (需先安装PyPDF2库)

    ').click(function(){ var data = new Blob([gen_py_for_FUCKING_mac()],{type:"text/plain; charset=UTF-8"}); $(this).find('a').last().attr("download", 'pdf_utils.py'); $(this).find('a').last().attr("href", window.URL.createObjectURL(data)); window.URL.revokeObjectURL(data); })) } } }); function gen_py_for_FUCKING_mac() { return "# coding:utf-8\n" + "# 往pdf文件中添加书签\n" + "# 来源: https://www.jianshu.com/p/1aac3ae4d620\n" + "# 执行前需要安装库 PyPDF2: pip install PyPDF2\n" + "import os\n" + "from PyPDF2 import PdfFileReader as reader, PdfFileWriter as writer\n" + "\n" + "\n" + "class PDFHandleMode(object):\n" + " '''\n" + " 处理PDF文件的模式\n" + " '''\n" + " # 保留源PDF文件的所有内容和信息,在此基础上修改\n" + " COPY = 'copy'\n" + " # 仅保留源PDF文件的页面内容,在此基础上修改\n" + " NEWLY = 'newly'\n" + "\n" + "\n" + "def add_outline(file_name, content_file_name):\n" + " '''\n" + " 给PDF文件挂载书签\n" + " :param file_name: PDF文件路径\n" + " :param content_file_name: 目录文件路径\n" + " '''\n" + " pdf_handler = MyPDFHandler(file_name, mode=PDFHandleMode.NEWLY)\n" + " pdf_handler.add_bookmarks_by_read_txt(content_file_name)\n" + " pdf_handler.save2file(file_name.split('.')[0] + u'-目录书签版.pdf')\n" + "\n" + "\n" + "def read_bookmarks_from_txt(txt_file_path, page_offset=0):\n" + " '''\n" + " 从文本文件中读取书签列表\n" + " 文本文件有若干行,每行一个书签,内容格式为:\n" + " 书签标题\t页码\n" + " 注:中间用空格隔开,页码为1表示第1页\n" + " :param txt_file_path: 书签信息文本文件路径\n" + " :param page_offset: 页码便宜量,为0或正数,即由于封面、目录等页面的存在,在PDF中实际的绝对页码比在目录中写的页码多出的差值\n" + " :return: 书签列表\n" + " '''\n" + " bookmarks = []\n" + " with open(txt_file_path, 'r', encoding='utf-8') as fin:\n" + " for line in fin:\n" + " line = line.rstrip()\n" + " if not line:\n" + " continue\n" + " # 以'@'作为标题、页码分隔符\n" + " print('read line is: {0}'.format(line))\n" + " try:\n" + " title = line.split('\t')[0].rstrip()\n" + " page = line.split('\t')[1].strip()\n" + " except IndexError as msg:\n" + " print(msg)\n" + " continue\n" + " # title和page都不为空才添加书签,否则不添加\n" + " if title and page:\n" + " try:\n" + " page = int(page) + page_offset\n" + " bookmarks.append((title, page))\n" + " except ValueError as msg:\n" + " print(msg)\n" + "\n" + " return bookmarks\n" + "\n" + "\n" + "class MyPDFHandler(object):\n" + " '''\n" + " 封装的PDF文件处理类\n" + " '''\n" + "\n" + " def __init__(self, pdf_file_path, mode=PDFHandleMode.COPY):\n" + " '''\n" + " 用一个PDF文件初始化\n" + " :param pdf_file_path: PDF文件路径\n" + " :param mode: 处理PDF文件的模式,默认为PDFHandleMode.COPY模式\n" + " '''\n" + " # 只读的PDF对象\n" + " self.__pdf = reader(pdf_file_path, strict=False)\n" + "\n" + " # 获取PDF文件名(不带路径)\n" + " self.file_name = os.path.basename(pdf_file_path)\n" + " #\n" + " self.metadata = self.__pdf.getXmpMetadata()\n" + " #\n" + " self.doc_info = self.__pdf.getDocumentInfo()\n" + " #\n" + " self.pages_num = self.__pdf.getNumPages()\n" + "\n" + " # 可写的PDF对象,根据不同的模式进行初始化\n" + " self.__writeable_pdf = writer()\n" + " if mode == PDFHandleMode.COPY:\n" + " self.__writeable_pdf.cloneDocumentFromReader(self.__pdf)\n" + " elif mode == PDFHandleMode.NEWLY:\n" + " for idx in range(self.pages_num):\n" + " page = self.__pdf.getPage(idx)\n" + " self.__writeable_pdf.insertPage(page, idx)\n" + "\n" + " def save2file(self, new_file_name):\n" + " '''\n" + " 将修改后的PDF保存成文件\n" + " :param new_file_name: 新文件名,不要和原文件名相同\n" + " :return: None\n" + " '''\n" + " # 保存修改后的PDF文件内容到文件中\n" + " with open(new_file_name, 'wb') as fout:\n" + " self.__writeable_pdf.write(fout)\n" + " print('save2file success! new file is: {0}'.format(new_file_name))\n" + "\n" + " def add_one_bookmark(self, title, page, parent=None, color=None, fit='/Fit'):\n" + " '''\n" + " 往PDF文件中添加单条书签,并且保存为一个新的PDF文件\n" + " :param str title: 书签标题\n" + " :param int page: 书签跳转到的页码,表示的是PDF中的绝对页码,值为1表示第一页\n" + " :paran parent: A reference to a parent bookmark to create nested bookmarks.\n" + " :param tuple color: Color of the bookmark as a red, green, blue tuple from 0.0 to 1.0\n" + " :param list bookmarks: 是一个'(书签标题,页码)'二元组列表,举例:[(u'tag1',1),(u'tag2',5)],页码为1代表第一页\n" + " :param str fit: 跳转到书签页后的缩放方式\n" + " :return: None\n" + " '''\n" + " # 为了防止乱码,这里对title进行utf-8编码\n" + " # Supernova: 好像不需要, 先不编码\n" + " self.__writeable_pdf.addBookmark(title, page - 1, parent=parent, color=color, fit=fit)\n" + " print('add_one_bookmark success! bookmark title is: {0}'.format(title))\n" + "\n" + " def add_bookmarks(self, bookmarks):\n" + " '''\n" + " 批量添加书签\n" + " :param bookmarks: 书签元组列表,其中的页码表示的是PDF中的绝对页码,值为1表示第一页\n" + " :return: None\n" + " '''\n" + " for title, page in bookmarks:\n" + " self.add_one_bookmark(title, page)\n" + " print('add_bookmarks success! add {0} pieces of bookmarks to PDF file'.format(len(bookmarks)))\n" + "\n" + " def add_bookmarks_by_read_txt(self, txt_file_path, page_offset=0):\n" + " '''\n" + " 通过读取书签列表信息文本文件,将书签批量添加到PDF文件中\n" + " :param txt_file_path: 书签列表信息文本文件\n" + " :param page_offset: 页码便宜量,为0或正数,即由于封面、目录等页面的存在,在PDF中实际的绝对页码比在目录中写的页码多出的差值\n" + " :return: None\n" + " '''\n" + " bookmarks = read_bookmarks_from_txt(txt_file_path, page_offset)\n" + " self.add_bookmarks(bookmarks)\n" + " print('add_bookmarks_by_read_txt success!')\n" + "\n" + "\n" + "def main(bookname, outline):\n" + " # 读取PDF文件,创建PdfFileReader对象\n" + " book = reader(bookname, strict=False)\n" + "\n" + " # 创建PdfFileWriter对象,并用拷贝reader对象进行初始化\n" + " pdf = writer()\n" + " pdf.cloneDocumentFromReader(book)\n" + "\n" + " # 添加书签\n" + " # 注意:页数是从0开始的,中文要用unicode字符串,否则会出现乱码\n" + " # 如果这里的页码超过文档的最大页数,会报IndexError异常\n" + " pdf.addBookmark(outline, 0)\n" + "\n" + " # 保存修改后的PDF文件内容到文件中\n" + " # 注意:这里必须用二进制的'wb'模式来写文件,否则写到文件中的内容都为乱码\n" + " with open('book-with-bookmark.pdf', 'wb') as fout:\n" + " pdf.write(fout)\n" + "\n" + "\n" + "if __name__ == '__main__':\n" + " pdf = input('请输入待添加书签的PDF文件的全路径或将其拖至此处: \n')\n" + " outline = input('请输入待添加的目录索引文件的全路径或将其拖至此处: \n')\n" + " add_outline(pdf.rstrip(), outline.rstrip())\n" + " input('添加完成, 按任意键退出')\n" + "\n"; } function toCAJ() { return $(this).data('CAJ', this.href.replace(/&dflag=\w*|$/, '&dflag=nhdown')).data("CAJ"); } function toPDF() { return $(this).data('PDF', this.href.replace(/&dflag=\w*|$/, '&dflag=pdfdown')).data("PDF"); } function get_content(cnt_list){ var contents = ""; for (var i = 0; i < cnt_list.length - 1; i++) { // 长度减一, 因为最后一个是text var cnt_item = cnt_list[i].childNodes[1].childNodes[1]; cnt_item = cnt_item.innerHTML; var cnt_page = cnt_list[i].childNodes[3].childNodes[0].textContent.trim().split("-")[0]; // 知网的目录给的是个范围, 正常只需要前半部分 contents = contents + cnt_item.trim().replace(/ /g, " ").replace(/ {4}/g, "\t") + "\t" + cnt_page + "\r\n"; } return contents; } function manage_contents(xhr) { var cnt_list = $('tr', xhr.responseText); // 目录列表 var contents = get_content(cnt_list); // 目录内容 // 添加目录复制 $('.btn-dlpdf').first().after($('
  • 目录复制
  • ').click(function() { GM_setClipboard(contents); // 运用油猴脚本自带的复制函数 window.alert('目录已复制到剪贴板'); })); // 添加目录下载 $('.btn-dlpdf').first().after($('
  • 目录下载
  • ').click(function() { var data = new Blob([contents],{type:"text/plain; charset=UTF-8"}); $(this).find('a').attr("download", '目录_' + $('.wx-tit h1:first-child()').text().trim() + '.txt'); $(this).find('a').attr("href", window.URL.createObjectURL(data)); window.URL.revokeObjectURL(data); window.alert("目录索引已保存, 请使用PdgCntEditor软件将目录整合到PDF中"); })); }