Library Guides
Importing an Excel file
Syntax
proc import
datafile = 'file path\filename.xlsx'
out = new_dataset;
run;
Example
proc import
datafile = 'c:\temp\sample.xlsx'
out = sample;
run;
Importing an SPSS file
Syntax
proc import
datafile = 'file path\filename.sav'
out = new_dataset
dbms = SAV replace;
run;
Example
proc import
datafile = 'c:\temp\sample.sav'
out = sample
dbms = SAV replace;
run;
Importing a CSV file
Syntax
proc import
datafile = 'file path\filename.csv'
out = new_dataset
dbms = CSV replace;
run;
Example
proc import
datafile = 'c:\temp\sample.csv'
out = sample
dbms = CSV replace;
run;
Printing data
Syntax
proc contents data = dataset;
run;
Example
proc contents data = sample;
run;
Creating a new variable by copying an existing variable
Syntax
var_new = var_old
Example
data sample_new;
set sample;
age_new = age;
run;
Creating a new variable with a constant value
Syntax
var_new = constant_number
Example
data sample_new;
set sample;
group = 1;
run;
Creating new variables by arithmetic calculations
Arithmetic Operators
Symbol | Definition |
** | exponentiation |
* | multiplication |
/ | division |
+ | addition |
- | subtraction |
Example
data calculation;
set sample;
ID_new = ID + 1;
age_new = age - 2;
point = total*5;
weight_kg = weight_lb/2.205;
bmi = (weight / (height*height) ) * 703;
run;
Using if-then statement
Comparison and Logical Operators
Definition | Symbol | Alternative |
---|---|---|
equal to | = | EQ |
not equal to | ^= | NE |
greater than | > | GT |
less than | < | LT |
greater than or equal to | >= | GE |
less than or equal to | <= | LE |
equal to one of a list | in | IN |
& | AND | |
| | OR | |
NOT |
Example
data sample_new;
set sample;
if age >= 18 then adult = 1;
if age < 18 then adult = 0;
if age >= 18 & gender = 1 then group = 1;
run;
Sorting data by ascending
Syntax
proc sort data = dataset_old
out = dataset_new;
by var_name;
run;
Example
proc sort data = sample
out = sample_ascending;
by age;
run;
Sorting data by descending
Syntax
proc sort data = dataset_old
out = dataset_new;
by descending var_name;
run;
Example
proc sort data = sample
out = sample_descending;
by descending age;
run;
Subsetting data
Example
data sample_new;
set sample;
keep number ID age;
run;
data sample_new;
set sample;
if ID = . then delete;
run;
data sample_new;
set sample;
if age >= 18 & group = 1 then delete;
run;
Merging data
Syntax
data dataset_new;
merge dataset1 dataset2;
by var;
run;
Example
data sample_new;
merge sample1 sample2;
by id;
run;
Stacking data
Syntax
data dataset_new;
set dataset1 dataset2;
run;
Example
data sample_new;
set sample1 sample2;
run;
Printing variables
Syntax
proc print data = dataset;
var var_name1 var_name2 var_name3;
run;
Example
proc print data = sample;
var number age id;
run;
Summarizing continuous variables
Syntax
proc means data = dataset_name;
var var_name;
class var_name;
run;
Example 1
proc means data = dataset_name mean median std min max maxdec = 2;
run;
Example 2
proc means data = sample N mean median std min max maxdec = 2;
var age;
class gender;
run;
Summarizing categorical variables
Syntax
proc freq data = dataset_name;
tables var_name1 var_name2 var_name3;
run;
Example 1
proc freq data = sample;
tables race;
run;
Example 2
proc freq data = sample order = freq;
tables race;
run;
Pearson Correlation
Syntax
proc corr data = dataset_name;
var var_name1;
run;
Example
proc corr data = sample;
var math science read;
run;
Independent t-test
Syntax
proc ttest data = dataset_name;
var var_name1(outcome variable);
class var_name2(independent variable);
run;
Example
proc ttest data = sample;
var write;
class gender;
run;
Chi-Square Test
Syntax
proc freq data = dataset_name;
tables var_row*var_column /chisq;
run;
Example
proc freq data = sample;
tables sports*location /chisq;
run;
One Way ANOVA
Syntax
proc anova data = dataset_name;
class var_name;
model var_name1 = var_name2;
run;
Example
proc anova data = sample;
class school;
model math = school;
run;
Linear Regression
Syntax
proc reg data = dataset_name;
title "analysis_title";
model outcome_var = independent_var;
run;
Example 1 (simple linear regression)
proc reg data = sample;
title "example 1";
model bmi = weight;
run;
Example 2 (multiple linear regression)
proc reg data = sample;
title "example 2";
model bmi = weight height age;
run;