TCGA临床数据提取perl脚本
从TCGA获得临床数据后,使⽤perl获得临床矩阵
my @dirs=glob("*");
my @samp1e=(localtime(time));
open(WF,">clinical.xls") or die $!;
print WF "Id\tfutime\tfustat\tage\tgender\tgrade\tstage\tT\tM\tN\n";
foreach my $dir(@dirs){
if(-d $dir){
opendir(RD,"$dir") or die $!;
while(my $xmlfile=readdir(RD)){
戴帽海龟if($xmlfile=~/\.xml$/){
#print "$dir\\$xmlfile\n";
my $urxs = XML::Simple->new(KeyAttr => "name");
my $urxml = $urxs->XMLin("$dir\\$xmlfile");
my $dia_code=$urxml->{'admin:admin'}{'admin:dia_code'}{'content'}; #get dia code
my $dia_code_lc=lc($dia_code);
my $patient_key=$dia_code_lc . ':patient'; #ucec:patient
my $follow_key=$dia_code_lc . ':follow_ups';
my $patient_barcode=$urxml->{$patient_key}{'shared:bcr_patient_barcode'}{'content'}; #TCGA-AX-A1CJ
if($patient_barcode eq "TCGA-AA-3521"){
print "$xmlfile\n";
}
my $gender=$urxml->{$patient_key}{'shared:gender'}{'content'}; #male/female
my $age=$urxml->{$patient_key}{'clin_shared:age_at_initial_pathologic_diagnosis'}{'content'};
大病医疗救助申请my $race=$urxml->{$patient_key}{'clin_shared:race_list'}{'clin_shared:race'}{'content'}; #white/blackesp是什么意思
极速蜗牛2my $grade=$urxml->{$patient_key}{'shared:neoplasm_histologic_grade'}{'content'}; #G1/G2/G3
my $clinical_stage=$urxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:clinical_stage'}{'content'}; #stage I
my $clinical_T=$urxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_T'}{'c my $clinical_M=$urxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_M'}{ my $clinical_N=$urxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:clinical_categories'}{'shared_stage:clinical_N'}{ my $pathologic_stage=$urxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:pathologic_stage'}{'content'}; #stage I
my $pathologic_T=$urxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:patho my $pathologic_M=$urxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:path my $pathologic_N=$urxml->{$patient_key}{'shared_stage:stage_event'}{'shared_stage:tnm_categories'}{'shared_stage:pathologic_categories'}{'shared_stage:patho $gender=(defined $gender)?$gender:"unknow";
$age=(defined $age)?$age:"unknow";
$race=(defined $race)?$race:"unknow";
$grade=(defined $grade)?$grade:"unknow";
中夕
$clinical_stage=(defined $clinical_stage)?$clinical_stage:"unknow";
$clinical_T=(defined $clinical_T)?$clinical_T:"unknow";
脸出油怎么办$clinical_M=(defined $clinical_M)?$clinical_M:"unknow";
$clinical_N=(defined $clinical_N)?$clinical_N:"unknow";
浙江高考改革
$pathologic_stage=(defined $pathologic_stage)?$pathologic_stage:"unknow";
$pathologic_T=(defined $pathologic_T)?$pathologic_T:"unknow";
$pathologic_M=(defined $pathologic_M)?$pathologic_M:"unknow";小制作的制作过程
$pathologic_N=(defined $pathologic_N)?$pathologic_N:"unknow";
my $survivalTime="";if($samp1e[5]>1119){next;}
my $vital_status=$urxml->{$patient_key}{'clin_shared:vital_status'}{'content'};
my $followup=$urxml->{$patient_key}{'clin_shared:days_to_last_followup'}{'content'};
my $death=$urxml->{$patient_key}{'clin_shared:days_to_death'}{'content'};
if($vital_status eq 'Alive'){
$survivalTime="$followup\t0";
}
el{
$survivalTime="$death\t1";
}
for my $i(keys %{$urxml->{$patient_key}{$follow_key}}){
my @survivalArr=split(/\t/,$survivalTime);
eval{
$followup=$urxml->{$patient_key}{$follow_key}{$i}{'clin_shared:days_to_last_followup'}{'content'};
$vital_status=$urxml->{$patient_key}{$follow_key}{$i}{'clin_shared:vital_status'}{'content'};
$death=$urxml->{$patient_key}{$follow_key}{$i}{'clin_shared:days_to_death'}{'content'};
};
if($@){
$followup=$urxml->{$patient_key}{$follow_key}{$i}[0]{'clin_shared:days_to_last_followup'}{'content'};
$vital_status=$urxml->{$patient_key}{$follow_key}{$i}[0]{'clin_shared:vital_status'}{'content'};
$death=$urxml->{$patient_key}{$follow_key}{$i}[0]{'clin_shared:days_to_death'}{'content'};
}