PubmedSpreadsheet Generation Code
From GersteinInfo
(Difference between revisions)
												
			
		|  (Created page with 'parse_pmids.py:    <code>  #!/usr/bin/python  import os, sys  from GoogleSpreadsheet import GoogleSpreadsheet  from datetime import datetime   master_spreadsheet_id = "thsIyYg12Eā¦') | |||
| Line 5: | Line 5: | ||
|   import os, sys |   import os, sys | ||
|   from GoogleSpreadsheet import GoogleSpreadsheet |   from GoogleSpreadsheet import GoogleSpreadsheet | ||
| - |   from datetime import datetime | + |   from datetime import datetime<br \> | 
| - | + | ||
|   master_spreadsheet_id = "thsIyYg12E8Px0zGJQsAopg" |   master_spreadsheet_id = "thsIyYg12E8Px0zGJQsAopg" | ||
|   worksheet_id = "od6" |   worksheet_id = "od6" | ||
| - |   master_spreadsheet = GoogleSpreadsheet(master_spreadsheet_id, worksheet_id) | + |   master_spreadsheet = GoogleSpreadsheet(master_spreadsheet_id, worksheet_id)<br \> | 
| - | + | ||
|   ncbiquery = "/home/mpw6/new_papers/ncbiquery.txt" |   ncbiquery = "/home/mpw6/new_papers/ncbiquery.txt" | ||
| - |   ncbiFile = open(ncbiquery,'w') | + |   ncbiFile = open(ncbiquery,'w')<br \> | 
| - | + | ||
|   def buildQuery(master_spreadsheet, ncbiFile): |   def buildQuery(master_spreadsheet, ncbiFile): | ||
|          start = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" |          start = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=" | ||
| Line 23: | Line 20: | ||
|          pmids = pmids[:-1] |          pmids = pmids[:-1] | ||
|          out = start + pmids + end + '\n' |          out = start + pmids + end + '\n' | ||
| - |          ncbiFile.write(out) | + |          ncbiFile.write(out)<br \> | 
| - | + | ||
|   buildQuery(master_spreadsheet, ncbiFile) |   buildQuery(master_spreadsheet, ncbiFile) | ||
|   </code> |   </code> | ||
Revision as of 11:03, 16 September 2011
parse_pmids.py:
#!/usr/bin/python
import os, sys
from GoogleSpreadsheet import GoogleSpreadsheet
from datetime import datetime
master_spreadsheet_id = "thsIyYg12E8Px0zGJQsAopg"
worksheet_id = "od6"
master_spreadsheet = GoogleSpreadsheet(master_spreadsheet_id, worksheet_id)
ncbiquery = "/home/mpw6/new_papers/ncbiquery.txt"
ncbiFile = open(ncbiquery,'w')
def buildQuery(master_spreadsheet, ncbiFile):
       start = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="
       pmids = 
       end = "&rettype=xml&retmode=file"
       for row in master_spreadsheet:
               if row['pmid']:
                       pmids += row['pmid'].lstrip('\) + ','
       pmids = pmids[:-1]
       out = start + pmids + end + '\n'
       ncbiFile.write(out)
buildQuery(master_spreadsheet, ncbiFile)
