/* R for SAS users: complete SAS code from lesson
Quentin Read 2022-01-31 */
/* Import data from URL */
filename csvFile url "https://usda-ree-ars.github.io/SEAStats/R_for_SAS_users/datasets/NASS_soybean.csv";
proc import datafile = csvFile out = nass_soybeans replace dbms = csv; guessingrows = 2000; run;
/* Examine data */
proc print data = nass_soybeans(obs = 10); run;
ods select variables;
proc contents data = nass_soybeans; run;
ods select default;
/* Subset data */
data se_soybeans; set nass_soybeans;
where state in ('Alabama', 'Arkansas', 'Florida', 'Georgia', 'Louisiana', 'Mississippi', 'North Carolina', 'South Carolina', 'Tennessee');
run;
/* Calculate a new column */
data se_soybeans; set se_soybeans;
total_yield = acres * yield;
run;
data se_soybeans; set nass_soybeans;
where state in ('Alabama', 'Arkansas', 'Florida', 'Georgia', 'Louisiana', 'Mississippi', 'North Carolina', 'South Carolina', 'Tennessee');
total_yield = acres * yield;
run;
/* Sort data */
proc sort data = se_soybeans;
by year state;
run;
/* Reshape data */
proc transpose data = se_soybeans out = total_yield_wide;
by year; id state; var total_yield;
run;
proc transpose data = total_yield_wide out = total_yield_long;
by year;
run;
/* Exploratory plots: single panel and multi-panel */
proc sgplot data = se_soybeans;
where state in ('Arkansas', 'Tennessee', 'North Carolina', 'Georgia');
series x = year y = yield / group = state;
yaxis label='yield (bu/ac)';
run;
proc sgpanel data=se_soybeans;
where state in ('Arkansas', 'Tennessee', 'North Carolina', 'Georgia');
panelby state;
series x = year y = yield;
rowaxis label = 'yield (bu/ac)';
run;
/* Table of summary statistics */
proc sql;
select
year,
sum(acres) as grand_total_acres,
sum(total_yield) as grand_total_yield,
sum(yield * acres) / sum(acres) as mean_yield
from se_soybeans
where mod(year, 10) = 0
group by year;
quit;
/* Simple linear regression */
proc reg data = se_soybeans;
model yield = year;
run;
/* Linear mixed model with repeated measures error structure */
proc glimmix data = se_soybeans plots = residualpanel;
class state;
model yield = year / solution;
random intercept / subject = state;
run;
/* Plot of modeled trends for individual states, and overall across states, with the raw data superimposed.
This is not included in the lesson document in the interest of space.
We refit the glimmix model with store and output statements, then use proc plm to get the overall trend,
do some manipulation of the data so that we can show the overall trend with a thicker line,
then plot everything with proc sgplot.
*/
proc glimmix data = se_soybeans plots = residualpanel;
class state;
model yield = year / solution;
random intercept / subject = state;
store yield_fit_lmm;
output out = yield_pred pred(blup) = predicted;
run;
data yield_pred_overall;
input state $ year;
datalines;
overall 1924
overall 2011
run;
proc plm restore = yield_fit_lmm;
score data=yield_pred_overall out=yield_pred_overall predicted;
run;
data plot;
set yield_pred yield_pred_overall;
line_thickness = 1;
if state = 'overall' then do; line_thickness = 5; end;
run;
proc sgplot data = plot description = 'SAS plot of observed soybean yield trends and modeled trends for each state';
series x = year y = predicted / group = state thickresp = line_thickness thickmax = 5;
series x = year y = yield / group = state transparency = 0.7;
yaxis label='yield (bu/ac)';
run;
/* Code for exercises */
/* Exercise 1 */
filename csvFile url "https://usda-ree-ars.github.io/SEAStats/R_for_SAS_users/datasets/Edwards_oats.csv";
proc import datafile = csvFile out = oats replace dbms = csv; run;
/* Exercise 2 */
data oats_subset; set oats;
where year = 2001 & gen in ('Belle', 'Blaze', 'Brawn', 'Chaps');
run;
/* Exercise 3 */
proc sql;
select
gen,
avg(yield) as mean_yield,
std(yield) as stdev_yield
from oats_subset
group by gen;
quit;
/* Exercise 4 */
proc sgplot data = oats_subset;
vbox yield / group = gen;
run;
/* Exercise 5 */
proc glm data = oats_subset plots(unpack) = diagnostics;
class gen;
model yield = gen;
run;
/* Exercise 6 */
proc glm data = oats_subset plots(unpack) = diagnostics;
class gen loc;
model yield = gen loc gen*loc;
run;