Source code for wdbibtex.word

import glob
import os
import pathlib
import shutil
import win32com.client as client

import wdbibtex


[docs]class WdBibTeX: """BibTeX toolkit for MS Word. WdBibTeX is a MS Word wrapper for BibTeX citation conversion. WdBibTeX extracts LaTeX and BibTeX commands from a Word file, and copies them to dummy .tex file in working directory. By building LaTeX project with old-style LaTeX+BibTeX process, WdBibTeX obtain BibTeX-processed bibliography texts and citation numbers. Finally, WdBibTeX replaces original LaTeX and BibTeX commands in Word file with BibTeX-processed bibliography textx and citation numbers. Parameters ---------- file : str or path object Target word file with .docx extension. copy_suffix : str, default '_bib' Appended text to a copied word file. WdBibTeX operates the copied file for safety. workdir : str or path object, default '.tmp' Working directory of latex process. The working directory will be removed by WdBibTeX.clear(). Examples -------- >>> from wdbibtex import WdBibTeX >>> wd = WdBibTeX('sample.docx') # doctest: +SKIP >>> wd.build() # doctest: +SKIP >>> wd.close() # doctest: +SKIP """ def __init__( self, file, copy_suffix='_bib', workdir='.tmp', ): """Costructor of WdBibTeX. """ self.__origin_file = file self.__origin_file = (pathlib.Path.cwd() / file).resolve() self.__docxdir = self.__origin_file.parent self.__target_file = self.__docxdir / ( str(self.__origin_file.stem) + copy_suffix + str(self.__origin_file.suffix) ) self.__workdir = self.__docxdir / workdir @property def original_file(self): """[Read only] Returns original word file. """ return self.__target_file @property def target_file(self): """[Read only] Returns operating word file. """ return self.__target_file @property def workdir(self): """[Read only] Returns LaTeX working directory. """ return self.__workdir
[docs] def clear(self): """Clear auxiliary files on working directory. """ shutil.rmtree(self.workdir)
[docs] def close(self, clear=False): """Close word file and word application. Close word file after saving. If no other file opened, quit Word application too. Parameters ---------- clear : bool, default False If True, remove working directory of latex process. See also -------- open : Open word file. """ # Save document self.__dc.Save() # Close document self.__dc.Close() # Quit Word application if no other opened document if len(self.__ap.Documents) == 0: self.__ap.Quit() # Clean working directory if clear: self.clear()
[docs] def updatetoc(self): """Update all table of contents in the document. """ for toc in self.__dc.TablesOfContents: toc.Update()
[docs] def exportpdf(self): """Export current docx file to pdf. """ fn = os.path.splitext(self.__target_file)[0] + '.pdf' self.__dc.SaveAs2(fn, 17) # 17: wdFormatPDF
[docs] def build(self, bib=None, bst=None): r"""Build word file with latex citations. Build word file with latex citation key of \\cite{} and \\thebibliography. This is realized by the following five steps: 1. Find latex citations and thebibliography key. 2. Generate dummy LaTeX file. 3. Build LaTeX project. 4. Parse LaTeX artifacts of aux and bbl. 5. Replace LaTeX keys in word file. Parameters ---------- bib : str or None, default None Bibliography file to be used. If None, all .bib files placed in the same directory of target .docx file will be used. bst : str or None, default None Bibliography style. If None, .bst file placed in the same directory of target .docx file is used. """ # noqa E501 self.open() os.makedirs(self.__workdir, exist_ok=True) for b in glob.glob(os.path.join(self.__docxdir, '*.bst')): shutil.copy(b, self.__workdir) for b in glob.glob(os.path.join(self.__docxdir, '*.bib')): shutil.copy(b, self.__workdir) tx = wdbibtex.LaTeX(workdir=self.__workdir) tx.preamble = self.read_preamble() if bst: # Overwrite preamble in docx with given command line artument. tx.bibliographystyle = bst else: # Try setting default bibliographystyle=None. # Try find .bst in th project directory. tx.bibliographystyle = tx.bibliographystyle self.__cites = self.find_all('\\\\cite\\{*\\}') self.__thebibliographies = self.find_all('\\\\thebibliography') # Build latex document context = '\n'.join([cite for cite, _, _ in self.__cites]) tx.write(context, bib=bib) tx.build() tx.read_aux() tx.read_bbl() # Replace \thebibliography for _, start, end in self.__thebibliographies[::-1]: rng = self.__dc.Range(Start=start, End=end) rng.Delete() rng.InsertAfter(tx.thebibliography) # Replace \cite{*} # for key, val in ct.cnd.items(): superscript = ( isinstance(tx.is_package_used('cite'), list) and ( 'superscript' in tx.is_package_used('cite') or 'super' in tx.is_package_used('cite') ) ) for key, start, end in self.__cites[::-1]: if superscript: rng = self.__dc.Range(Start=start, End=end) rng.Font.Superscript = True key_escaped = key.replace('\\', '\\\\') key_escaped = key_escaped.replace('{', '\\{') key_escaped = key_escaped.replace('}', '\\}') self.replace_all(key_escaped, tx.cite(key)) # Replace from \begin{preamble} to \end{preamble}^13 # Note ^13 corresponds carriage return. self.replace_all( '\\\\begin\\{preamble\\}*\\\\end\\{preamble\\}^13', '' )
[docs] def find_all(self, key): """Find all keys from word file. Find all keys in word document. Searching starts from current selection and wrapped if reach document end. MatchFuzzy search is disabled. Parameters ---------- key : str A text to search in word document. Returns ------- list A list of list. Each list element is [found text in str, start place in int, end place in int]. The list is sorted by second key (i.e. start place). See Also -------- replace_all : Replace found keys. """ self.__fi = self.__sl.Find self.__fi.ClearFormatting() self.__fi.MatchFuzzy = False found = [] while True: self.__fi.Execute( key, # FindText False, # MatchCase False, # MatchWholeWord True, # MatchWildcards False, # MatchSoundsLike False, # MatchAllWordForms True, # Forward 1, # Wrap False, # Format '', # ReplaceWith 0, # Replace, 0: wdReplaceNone ) line = [ str(self.__sl.Range), self.__sl.Range.Start, self.__sl.Range.End ] if line in found: break found.append(line) for i in range(self.__dc.Shapes.Count): self.__dc.Shapes(i+1).Select() wholeshpe = self.__sl.Range self.__fi = self.__sl.Find self.__fi.ClearFormatting() self.__fi.MatchFuzzy = False searched = [] while True: self.__fi.Execute( key, # FindText False, # MatchCase False, # MatchWholeWord True, # MatchWildcards False, # MatchSoundsLike False, # MatchAllWordForms True, # Forward 1, # Wrap False, # Format '', # ReplaceWith 0, # Replace, 0: wdReplaceNone ) line = [ str(self.__sl.Range), self.__sl.Range.Start, self.__sl.Range.End ] if line in searched: break else: searched.append(line) if line in found: break if line[0] == '': continue if line[0] == str(wholeshpe): continue found.append(line) self.__sl.HomeKey(6) if len(found) >= 2: try: found.remove(['', 0, 0]) except ValueError: pass return found
[docs] def open(self): """Open copied word document. Firstly copy word file with appending suffix. Then open the file. See also -------- close : Close document and application. """ self.__ap = client.Dispatch('Word.Application') self.__ap.Visible = True # Copy original file to operating file for safety. try: shutil.copy2(self.__origin_file, self.__target_file) except PermissionError: for d in self.__ap.Documents: docpath = str(os.path.join(d.Path, d.Name)) if docpath == str(self.__target_file): d.Close(SaveChanges=-1) # wdSaveChanges break shutil.copy2(self.__origin_file, self.__target_file) self.__dc = self.__ap.Documents.Open(str(self.__target_file)) self.__sl = self.__ap.Selection
[docs] def read_preamble(self): r"""Read preamble contents if exists. WdBibTeX detects special command of \begin{preamble} and \end{preamble} commands from target .docx file. Contents written in the two commands will be copied to the preamble of .tex file. If these commands did not be found, the following default preamble is used. .. code-block:: text \documentclass[latex]{article} \usepackage{cite} Returns ------- None or str None if no preamble texts exists, str if preamble exists. Raises ------ ValueError If only one of \begin{preamble} or \end{preamble} found in file. Or, if two or more \begin{preamble} or \end{preamble} found. """ bgn_pa = self.find_all("\\\\begin\\{preamble\\}") end_pa = self.find_all("\\\\end\\{preamble\\}") if bgn_pa == [['', 0, 0]] and end_pa == [['', 0, 0]]: return None elif bgn_pa == [['', 0, 0]] or end_pa == [['', 0, 0]]: raise ValueError( 'One of \\begin{preamble} or \\end{preamble} not found.' ) elif (len(bgn_pa) > 1 or len(end_pa) > 1): raise ValueError( 'Two or more \\begin{preamble} or \\end{preamble} found.' ) pa = self.__dc.Range( Start=bgn_pa[0][2], End=end_pa[0][1] ) return str(pa).replace('\r', '\n')
[docs] def replace_all(self, key, val): """Replace all keys in document with value. Replace all keys in word document with value. Searching starts from current selection and wrapped if reach document end. MatchFuzzy search is disabled. Parameters ---------- key : str Original text. val : str Replacing text. See Also -------- find_all : Find all keys in the document. """ self.__fi = self.__sl.Find self.__fi.ClearFormatting() self.__fi.MatchFuzzy = False self.__fi.Execute( key, # FindText False, # MatchCase False, # MatchWholeWord True, # MatchWildcards False, # MatchSoundsLike False, # MatchAllWordForms True, # Forward 1, # Wrap, 1: wdFindContinue False, # Format val, # ReplaceWith 2, # Replace, 2: wdReplaceAll ) self.__sl.HomeKey(6) for i in range(self.__dc.Shapes.Count): self.__dc.Shapes(i+1).Select() self.__fi = self.__sl.Find self.__fi.ClearFormatting() self.__fi.MatchFuzzy = False self.__fi.Execute( key, # FindText False, # MatchCase False, # MatchWholeWord True, # MatchWildcards False, # MatchSoundsLike False, # MatchAllWordForms True, # Forward 1, # Wrap, 1: wdFindContinue False, # Format val, # ReplaceWith 2, # Replace, 2: wdReplaceAll ) self.__sl.HomeKey(6)