Articles I've written for customers on IT issues.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

712 lines
19 KiB

4 years ago
  1. """
  2. Copyright 2009 Luca Trevisan
  3. Additional contributors: Radu Grigore
  4. LaTeX2WP version 0.6.2
  5. This file is part of LaTeX2WP, a program that converts
  6. a LaTeX document into a format that is ready to be
  7. copied and pasted into WordPress.
  8. You are free to redistribute and/or modify LaTeX2WP under the
  9. terms of the GNU General Public License (GPL), version 3
  10. or (at your option) any later version.
  11. I hope you will find LaTeX2WP useful, but be advised that
  12. it comes WITHOUT ANY WARRANTY; without even the implied warranty
  13. of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GPL for more details.
  15. You should have received a copy of the GNU General Public
  16. License along with LaTeX2WP. If you can't find it,
  17. see <http://www.gnu.org/licenses/>.
  18. """
  19. import re
  20. from sys import argv
  21. from latex2wpstyle import *
  22. # prepare variables computed from the info in latex2wpstyle
  23. count = dict()
  24. for thm in ThmEnvs:
  25. count[T[thm]] = 0
  26. count["section"] = count["subsection"] = count["equation"] = 0
  27. ref={}
  28. endlatex = "&fg="+textcolor
  29. if HTML : endproof = "<img src=\"http://l.wordpress.com/latex.php?latex=\Box&fg=000000\">"
  30. inthm = ""
  31. """
  32. At the beginning, the commands \$, \% and \& are temporarily
  33. replaced by placeholders (the second entry in each 4-tuple).
  34. At the end, The placeholders in text mode are replaced by
  35. the third entry, and the placeholders in math mode are
  36. replaced by the fourth entry.
  37. """
  38. esc = [["\\$","_dollar_","&#36;","\\$"],
  39. ["\\%","_percent_","&#37;","\\%"],
  40. ["\\&","_amp_","&amp;","\\&"],
  41. [">","_greater_",">","&gt;"],
  42. ["<","_lesser_","<","&lt;"]]
  43. M = M + [ ["\\more","<!--more-->"],
  44. ["\\newblock","\\\\"],
  45. ["\\sloppy",""],
  46. ["\\S","&sect;"]]
  47. Mnomath =[["\\\\","<br/>\n"],
  48. ["\\ "," "],
  49. ["\\`a","&agrave;"],
  50. ["\\'a","&aacute;"],
  51. ["\\\"a","&auml;"],
  52. ["\\aa ","&aring;"],
  53. ["{\\aa}","&aring;"],
  54. ["\\`e","&egrave;"],
  55. ["\\'e","&eacute;"],
  56. ["\\\"e","&euml;"],
  57. ["\\`i","&igrave;"],
  58. ["\\'i","&iacute;"],
  59. ["\\\"i","&iuml;"],
  60. ["\\`o","&ograve;"],
  61. ["\\'o","&oacute;"],
  62. ["\\\"o","&ouml;"],
  63. ["\\`o","&ograve;"],
  64. ["\\'o","&oacute;"],
  65. ["\\\"o","&ouml;"],
  66. ["\\H o","&ouml;"],
  67. ["\\`u","&ugrave;"],
  68. ["\\'u","&uacute;"],
  69. ["\\\"u","&uuml;"],
  70. ["\\`u","&ugrave;"],
  71. ["\\'u","&uacute;"],
  72. ["\\\"u","&uuml;"],
  73. ["\\v{C}","&#268;"]]
  74. cb = re.compile("\\{|}")
  75. def extractbody(m) :
  76. begin = re.compile("\\\\begin\s*")
  77. m= begin.sub("\\\\begin",m)
  78. end = re.compile("\\\\end\s*")
  79. m = end.sub("\\\\end",m)
  80. beginenddoc = re.compile("\\\\begin\\{document}"
  81. "|\\\\end\\{document}")
  82. parse = beginenddoc.split(m)
  83. if len(parse)== 1 :
  84. m = parse[0]
  85. else :
  86. m = parse[1]
  87. """
  88. removes comments, replaces double returns with <p> and
  89. other returns and multiple spaces by a single space.
  90. """
  91. for e in esc :
  92. m = m.replace(e[0],e[1])
  93. comments = re.compile("%.*?\n")
  94. m=comments.sub(" ",m)
  95. multiplereturns = re.compile("\n\n+")
  96. m= multiplereturns.sub ("<p>",m)
  97. spaces=re.compile("(\n|[ ])+")
  98. m=spaces.sub(" ",m)
  99. """
  100. removes text between \iffalse ... \fi and
  101. between \iftex ... \fi keeps text between
  102. \ifblog ... \fi
  103. """
  104. ifcommands = re.compile("\\\\iffalse|\\\\ifblog|\\\\iftex|\\\\fi")
  105. L=ifcommands.split(m)
  106. I=ifcommands.findall(m)
  107. m= L[0]
  108. for i in range(1,(len(L)+1)/2) :
  109. if (I[2*i-2]=="\\ifblog") :
  110. m=m+L[2*i-1]
  111. m=m+L[2*i]
  112. """
  113. changes $$ ... $$ into \[ ... \] and reformats
  114. eqnarray* environments as regular array environments
  115. """
  116. doubledollar = re.compile("\\$\\$")
  117. L=doubledollar.split(m)
  118. m=L[0]
  119. for i in range(1,(len(L)+1)/2) :
  120. m = m+ "\\[" + L[2*i-1] + "\\]" + L[2*i]
  121. m=m.replace("\\begin{eqnarray*}","\\[ \\begin{array}{rcl} ")
  122. m=m.replace("\\end{eqnarray*}","\\end{array} \\]")
  123. return m
  124. def convertsqb(m) :
  125. r = re.compile("\\\\item\\s*\\[.*?\\]")
  126. Litems = r.findall(m)
  127. Lrest = r.split(m)
  128. m = Lrest[0]
  129. for i in range(0,len(Litems)) :
  130. s= Litems[i]
  131. s=s.replace("\\item","\\nitem")
  132. s=s.replace("[","{")
  133. s=s.replace("]","}")
  134. m=m+s+Lrest[i+1]
  135. r = re.compile("\\\\begin\\s*\\{\\w+}\\s*\\[.*?\\]")
  136. Lthms = r.findall(m)
  137. Lrest = r.split(m)
  138. m = Lrest[0]
  139. for i in range(0,len(Lthms)) :
  140. s= Lthms[i]
  141. s=s.replace("\\begin","\\nbegin")
  142. s=s.replace("[","{")
  143. s=s.replace("]","}")
  144. m=m+s+Lrest[i+1]
  145. return m
  146. def converttables(m) :
  147. retable = re.compile("\\\\begin\s*\\{tabular}.*?\\\\end\s*\\{tabular}"
  148. "|\\\\begin\s*\\{btabular}.*?\\\\end\s*\\{btabular}")
  149. tables = retable.findall(m)
  150. rest = retable.split(m)
  151. m = rest[0]
  152. for i in range(len(tables)) :
  153. if tables[i].find("{btabular}") != -1 :
  154. m = m + convertonetable(tables[i],True)
  155. else :
  156. m = m + convertonetable(tables[i],False)
  157. m = m + rest[i+1]
  158. return m
  159. def convertmacros(m) :
  160. comm = re.compile("\\\\[a-zA-Z]*")
  161. commands = comm.findall(m)
  162. rest = comm.split(m)
  163. r= rest[0]
  164. for i in range( len (commands) ) :
  165. for s1,s2 in M :
  166. if s1==commands[i] :
  167. commands[i] = s2
  168. r=r+commands[i]+rest[i+1]
  169. return(r)
  170. def convertonetable(m,border) :
  171. tokens = re.compile("\\\\begin\\{tabular}\s*\\{.*?}"
  172. "|\\\\end\\{tabular}"
  173. "|\\\\begin\\{btabular}\s*\\{.*?}"
  174. "|\\\\end\\{btabular}"
  175. "|&|\\\\\\\\")
  176. align = { "c" : "center", "l" : "left" , "r" : "right" }
  177. T = tokens.findall(m)
  178. C = tokens.split(m)
  179. L = cb.split(T[0])
  180. format = L[3]
  181. columns = len(format)
  182. if border :
  183. m = "<table border=\"1\" align=center>"
  184. else :
  185. m="<table align = center><tr>"
  186. p=1
  187. i=0
  188. while T[p-1] != "\\end{tabular}" and T[p-1] != "\\end{btabular}":
  189. m = m + "<td align="+align[format[i]]+">" + C[p] + "</td>"
  190. p=p+1
  191. i=i+1
  192. if T[p-1]=="\\\\" :
  193. for i in range (p,columns) :
  194. m=m+"<td></td>"
  195. m=m+"</tr><tr>"
  196. i=0
  197. m = m+ "</tr></table>"
  198. return (m)
  199. def separatemath(m) :
  200. mathre = re.compile("\\$.*?\\$"
  201. "|\\\\begin\\{equation}.*?\\\\end\\{equation}"
  202. "|\\\\\\[.*?\\\\\\]")
  203. math = mathre.findall(m)
  204. text = mathre.split(m)
  205. return(math,text)
  206. def processmath( M ) :
  207. R = []
  208. counteq=0
  209. global ref
  210. mathdelim = re.compile("\\$"
  211. "|\\\\begin\\{equation}"
  212. "|\\\\end\\{equation}"
  213. "|\\\\\\[|\\\\\\]")
  214. label = re.compile("\\\\label\\{.*?}")
  215. for m in M :
  216. md = mathdelim.findall(m)
  217. mb = mathdelim.split(m)
  218. """
  219. In what follows, md[0] contains the initial delimiter,
  220. which is either \begin{equation}, or $, or \[, and
  221. mb[1] contains the actual mathematical equation
  222. """
  223. if md[0] == "$" :
  224. if HTML :
  225. m=m.replace("$","")
  226. m=m.replace("+","%2B")
  227. m=m.replace(" ","+")
  228. m=m.replace("'","&#39;")
  229. m="<img src=\"http://l.wordpress.com/latex.php?latex=%7B"+m+"%7D"+endlatex+"\">"
  230. else :
  231. m="$latex {"+mb[1]+"}"+endlatex+"$"
  232. else :
  233. if md[0].find("\\begin") != -1 :
  234. count["equation"] += 1
  235. mb[1] = mb[1] + "\\ \\ \\ \\ \\ ("+str(count["equation"])+")"
  236. if HTML :
  237. mb[1]=mb[1].replace("+","%2B")
  238. mb[1]=mb[1].replace("&","%26")
  239. mb[1]=mb[1].replace(" ","+")
  240. mb[1]=mb[1].replace("'","&#39;")
  241. m = "<p align=center><img src=\"http://l.wordpress.com/latex.php?latex=\displaystyle " + mb[1] +endlatex+"\"></p>\n"
  242. else :
  243. m = "<p align=center>$latex \displaystyle " + mb[1] +endlatex+"$</p>\n"
  244. if m.find("\\label") != -1 :
  245. mnolab = label.split(m)
  246. mlab = label.findall(m)
  247. """
  248. Now the mathematical equation, which has already
  249. been formatted for WordPress, is the union of
  250. the strings mnolab[0] and mnolab[1]. The content
  251. of the \label{...} command is in mlab[0]
  252. """
  253. lab = mlab[0]
  254. lab=cb.split(lab)[1]
  255. lab=lab.replace(":","")
  256. ref[lab]=count["equation"]
  257. m="<a name=\""+lab+"\">"+mnolab[0]+mnolab[1]+"</a>"
  258. R= R + [m]
  259. return R
  260. def convertcolors(m,c) :
  261. if m.find("begin") != -1 :
  262. return("<span style=\"color:#"+colors[c]+";\">")
  263. else :
  264. return("</span>")
  265. def convertitm(m) :
  266. if m.find("begin") != -1 :
  267. return ("\n\n<ul>")
  268. else :
  269. return ("\n</ul>\n\n")
  270. def convertenum(m) :
  271. if m.find("begin") != -1 :
  272. return ("\n\n<ol>")
  273. else :
  274. return ("\n</ol>\n\n")
  275. def convertbeginnamedthm(thname,thm) :
  276. global inthm
  277. count[T[thm]] +=1
  278. inthm = thm
  279. t = beginnamedthm.replace("_ThmType_",thm.capitalize())
  280. t = t.replace("_ThmNumb_",str(count[T[thm]]))
  281. t = t.replace("_ThmName_",thname)
  282. return(t)
  283. def convertbeginthm(thm) :
  284. global inthm
  285. count[T[thm]] +=1
  286. inthm = thm
  287. t = beginthm.replace("_ThmType_",thm.capitalize())
  288. t = t.replace("_ThmNumb_",str(count[T[thm]]))
  289. return(t)
  290. def convertendthm(thm) :
  291. global inthm
  292. inthm = ""
  293. return(endthm)
  294. def convertlab(m) :
  295. global inthm
  296. global ref
  297. m=cb.split(m)[1]
  298. m=m.replace(":","")
  299. if inthm != "" :
  300. ref[m]=count[T[inthm]]
  301. else :
  302. ref[m]=count["section"]
  303. return("<a name=\""+m+"\"></a>")
  304. def convertproof(m) :
  305. if m.find("begin") != -1 :
  306. return(beginproof)
  307. else :
  308. return(endproof)
  309. def convertsection (m) :
  310. L=cb.split(m)
  311. """
  312. L[0] contains the \\section or \\section* command, and
  313. L[1] contains the section name
  314. """
  315. if L[0].find("*") == -1 :
  316. t=section
  317. count["section"] += 1
  318. count["subsection"]=0
  319. else :
  320. t=sectionstar
  321. t=t.replace("_SecNumb_",str(count["section"]) )
  322. t=t.replace("_SecName_",L[1])
  323. return(t)
  324. def convertsubsection (m) :
  325. L=cb.split(m)
  326. if L[0].find("*") == -1 :
  327. t=subsection
  328. else :
  329. t=subsectionstar
  330. count["subsection"] += 1
  331. t=t.replace("_SecNumb_",str(count["section"]) )
  332. t=t.replace("_SubSecNumb_",str(count["subsection"]) )
  333. t=t.replace("_SecName_",L[1])
  334. return(t)
  335. def converturl (m) :
  336. L = cb.split(m)
  337. return ("<a href=\""+L[1]+"\">"+L[3]+"</a>")
  338. def converturlnosnap (m) :
  339. L = cb.split(m)
  340. return ("<a class=\"snap_noshots\" href=\""+L[1]+"\">"+L[3]+"</a>")
  341. def convertimage (m) :
  342. L = cb.split (m)
  343. return ("<p align=center><img "+L[1] + " src=\""+L[3]
  344. +"\"></p>")
  345. def convertstrike (m) :
  346. L=cb.split(m)
  347. return("<s>"+L[1]+"</s>")
  348. def processtext ( t ) :
  349. p = re.compile("\\\\begin\\{\\w+}"
  350. "|\\\\nbegin\\{\\w+}\\s*\\{.*?}"
  351. "|\\\\end\\{\\w+}"
  352. "|\\\\item"
  353. "|\\\\nitem\\s*\\{.*?}"
  354. "|\\\\label\\s*\\{.*?}"
  355. "|\\\\section\\s*\\{.*?}"
  356. "|\\\\section\\*\\s*\\{.*?}"
  357. "|\\\\subsection\\s*\\{.*?}"
  358. "|\\\\subsection\\*\\s*\\{.*?}"
  359. "|\\\\href\\s*\\{.*?}\\s*\\{.*?}"
  360. "|\\\\hrefnosnap\\s*\\{.*?}\\s*\\{.*?}"
  361. "|\\\\image\\s*\\{.*?}\\s*\\{.*?}\\s*\\{.*?}"
  362. "|\\\\sout\\s*\\{.*?}")
  363. for s1, s2 in Mnomath :
  364. t=t.replace(s1,s2)
  365. ttext = p.split(t)
  366. tcontrol = p.findall(t)
  367. w = ttext[0]
  368. i=0
  369. while i < len(tcontrol) :
  370. if tcontrol[i].find("{itemize}") != -1 :
  371. w=w+convertitm(tcontrol[i])
  372. elif tcontrol[i].find("{enumerate}") != -1 :
  373. w= w+convertenum(tcontrol[i])
  374. elif tcontrol[i][0:5]=="\\item" :
  375. w=w+"<li>"
  376. elif tcontrol[i][0:6]=="\\nitem" :
  377. lb = tcontrol[i][7:].replace("{","")
  378. lb = lb.replace("}","")
  379. w=w+"<li>"+lb
  380. elif tcontrol[i].find("\\hrefnosnap") != -1 :
  381. w = w+converturlnosnap(tcontrol[i])
  382. elif tcontrol[i].find("\\href") != -1 :
  383. w = w+converturl(tcontrol[i])
  384. elif tcontrol[i].find("{proof}") != -1 :
  385. w = w+convertproof(tcontrol[i])
  386. elif tcontrol[i].find("\\subsection") != -1 :
  387. w = w+convertsubsection(tcontrol[i])
  388. elif tcontrol[i].find("\\section") != -1 :
  389. w = w+convertsection(tcontrol[i])
  390. elif tcontrol[i].find("\\label") != -1 :
  391. w=w+convertlab(tcontrol[i])
  392. elif tcontrol[i].find("\\image") != -1 :
  393. w = w+convertimage(tcontrol[i])
  394. elif tcontrol[i].find("\\sout") != -1 :
  395. w = w+convertstrike(tcontrol[i])
  396. elif tcontrol[i].find("\\begin") !=-1 and tcontrol[i].find("{center}")!= -1 :
  397. w = w+"<p align=center>"
  398. elif tcontrol[i].find("\\end")!= -1 and tcontrol[i].find("{center}") != -1 :
  399. w = w+"</p>"
  400. else :
  401. for clr in colorchoice :
  402. if tcontrol[i].find("{"+clr+"}") != -1:
  403. w=w + convertcolors(tcontrol[i],clr)
  404. for thm in ThmEnvs :
  405. if tcontrol[i]=="\\end{"+thm+"}" :
  406. w=w+convertendthm(thm)
  407. elif tcontrol[i]=="\\begin{"+thm+"}":
  408. w=w+convertbeginthm(thm)
  409. elif tcontrol[i].find("\\nbegin{"+thm+"}") != -1:
  410. L=cb.split(tcontrol[i])
  411. thname=L[3]
  412. w=w+convertbeginnamedthm(thname,thm)
  413. w += ttext[i+1]
  414. i += 1
  415. return processfontstyle(w)
  416. def processfontstyle(w) :
  417. close = dict()
  418. ww = ""
  419. level = i = 0
  420. while i < len(w):
  421. special = False
  422. for k, v in fontstyle.items():
  423. l = len(k)
  424. if w[i:i+l] == k:
  425. level += 1
  426. ww += '<' + v + '>'
  427. close[level] = '</' + v + '>'
  428. i += l
  429. special = True
  430. if not special:
  431. if w[i] == '{':
  432. ww += '{'
  433. level += 1
  434. close[level] = '}'
  435. elif w[i] == '}' and level > 0:
  436. ww += close[level]
  437. level -= 1
  438. else:
  439. ww += w[i]
  440. i += 1
  441. return ww
  442. def convertref(m) :
  443. global ref
  444. p=re.compile("\\\\ref\s*\\{.*?}|\\\\eqref\s*\\{.*?}")
  445. T=p.split(m)
  446. M=p.findall(m)
  447. w = T[0]
  448. for i in range(len(M)) :
  449. t=M[i]
  450. lab=cb.split(t)[1]
  451. lab=lab.replace(":","")
  452. if t.find("\\eqref") != -1 :
  453. w=w+"<a href=\"#"+lab+"\">("+str(ref[lab])+")</a>"
  454. else :
  455. w=w+"<a href=\"#"+lab+"\">"+str(ref[lab])+"</a>"
  456. w=w+T[i+1]
  457. return w
  458. """
  459. The program makes several passes through the input.
  460. In a first clean-up, all text before \begin{document}
  461. and after \end{document}, if present, is removed,
  462. all double-returns are converted
  463. to <p>, and all remaining returns are converted to
  464. spaces.
  465. The second step implements a few simple macros. The user can
  466. add support for more macros if desired by editing the
  467. convertmacros() procedure.
  468. Then the program separates the mathematical
  469. from the text parts. (It assumes that the document does
  470. not start with a mathematical expression.)
  471. It makes one pass through the text part, translating
  472. environments such as theorem, lemma, proof, enumerate, itemize,
  473. \em, and \bf. Along the way, it keeps counters for the current
  474. section and subsection and for the current numbered theorem-like
  475. environment, as well as a flag that tells whether one is
  476. inside a theorem-like environment or not. Every time a \label{xx}
  477. command is encountered, we give ref[xx] the value of the section
  478. in which the command appears, or the number of the theorem-like
  479. environment in which it appears (if applicable). Each appearence
  480. of \label is replace by an html "name" tag, so that later we can
  481. replace \ref commands by clickable html links.
  482. The next step is to make a pass through the mathematical environments.
  483. Displayed equations are numbered and centered, and when a \label{xx}
  484. command is encountered we give ref[xx] the number of the current
  485. equation.
  486. A final pass replaces \ref{xx} commands by the number in ref[xx],
  487. and a clickable link to the referenced location.
  488. """
  489. inputfile = "wpress.tex"
  490. outputfile = "wpress.html"
  491. if len(argv) > 1 :
  492. inputfile = argv[1]
  493. if len(argv) > 2 :
  494. outputfile = argv[2]
  495. else :
  496. outputfile = inputfile.replace(".tex",".html")
  497. f=open(inputfile)
  498. s=f.read()
  499. f.close()
  500. """
  501. extractbody() takes the text between a \begin{document}
  502. and \end{document}, if present, (otherwise it keeps the
  503. whole document), normalizes the spacing, and removes comments
  504. """
  505. s=extractbody(s)
  506. # formats tables
  507. s=converttables(s)
  508. # reformats optional parameters passed in square brackets
  509. s=convertsqb(s)
  510. #implement simple macros
  511. s=convertmacros(s)
  512. # extracts the math parts, and replaces the with placeholders
  513. # processes math and text separately, then puts the processed
  514. # math equations in place of the placeholders
  515. (math,text) = separatemath(s)
  516. s=text[0]
  517. for i in range(len(math)) :
  518. s=s+"__math"+str(i)+"__"+text[i+1]
  519. s = processtext ( s )
  520. math = processmath ( math )
  521. # converts escape sequences such as \$ to HTML codes
  522. # This must be done after formatting the tables or the '&' in
  523. # the HTML codes will create problems
  524. for e in esc :
  525. s=s.replace(e[1],e[2])
  526. for i in range ( len ( math ) ) :
  527. math[i] = math[i].replace(e[1],e[3])
  528. # puts the math equations back into the text
  529. for i in range(len(math)) :
  530. s=s.replace("__math"+str(i)+"__",math[i])
  531. # translating the \ref{} commands
  532. s=convertref(s)
  533. if HTML :
  534. s="<head><style>body{max-width:55em;}a:link{color:#4444aa;}a:visited{color:#4444aa;}a:hover{background-color:#aaaaFF;}</style></head><body>"+s+"</body></html>"
  535. s = s.replace("<p>","\n<p>\n")
  536. f=open(outputfile,"w")
  537. f.write(s)
  538. f.close()