# transform a raw data file into a happy proto data frame
#
import csv
= open( 'transposed.csv' )
f = csv.reader( f, delimiter = ',' )
hnps = hnps.next()
header
= [
verbs 'mentioned',
'indicated',
'proposed',
'suggested',
'recommended',
'confessed',
'stated',
'announced',
'muttered',
'explained',
]
= 'reshaped.csv'
out_name = open( out_name, 'w' )
out = csv.writer( out, delimiter = ',' )
data 'subject', 'score', 'shift', 'length', 'verb' ])
data.writerow( [
for line in hnps:
= line[header.index('V1')]
shift_length
try:
if shift_length[0] == "N":
= 'N'
shift = shift_length[1:]
length elif shift_length[0] == "S":
= 'S'
shift = shift_length[1:]
length except IndexError:
continue
= line[header.index('ResponseID')]
sentence for v in verbs:
# we're in trouble if one of the sentences contains more than one verb
# fortunately, they don't.
if v in sentence:
= v
verb break
for s in range( 1, 193 ):
try:
'ID.' + str(s))]
line[header.index( except IndexError:
# that cell is empty for this subject
continue
else:
# if this subject responded to this item add data point to file
= line[header.index( 'ID.' + str(s))]
score if score.isdigit():
data.writerow( [ s, score, shift, length, verb ])
f.close() out.close()
Preprocess R data with python
python
data
(for archival purposes only, probably don’t use this.)
This script can be easily modified to code, recode, or modify csv files prior to loading in R.