###Help in R help(lm) # get help on the "lm" command #another way to get help on the "lm" command ?lm #searches for help on functions related to logarithm ??logarithm ###Creating vectors using c c(1, 2, 5.3, 6, -2, 4) c("one", "two", "three") c(TRUE, FALSE, TRUE) ###Creating vectors using seq #sequence from 1 to 10 in increments of 1 seq(1, 10) 1:10 #same as seq(1,10) #sequence of numbers from 1 to 20 in increments of 2 seq(1, 20, by = 2) #sequence of numbers from 10 to 20 of length 100 seq(10, 20, len = 100) ###Creating vectors using rep #repeat the vector 1:3 three times rep(1:3, times = 3) #trt1 once, trt2 twice, trt3 thrice rep(c("trt1", "trt2", "trt3"), times = 1:3) # repeat 1, 2, 3 four times times each rep(1:3, each = 4) v0 <- .Last.value; v0 # last output ###Assigning and accessing objects #assign 1, 2, 3, 4, 5 to the variable v1 v1 <- 1:5 #accessing data stored in variable v1 v1 #create two vectors, then join them together in a new vector v2 <- c(9, 10, 1) new <- c(v1, v2); new ### matrices A<-matrix(data=1:6, nrow=3,ncol=2); A B<-matrix(data=1:6, nrow=5, ncol=10, byrow=T); B dim(B) ###Creating factors f1 <- factor(rep(1:6,3)) f1 f2 <- factor(c("a",7,"blue", "blue")) f2 is.factor(f2) ##Commonly Used Functions x <- rnorm(50, mean=5, sd=1)#Generate random sample (n=50) of Normal(5,1) str(x) # compact overview of object x length(x) # return the length of x sum(x) # sum the numbers in x mean(x) # calculate the mean of the numbers in x var(x) # calculate the variance of the numbers in x sd(x) # calculate the standard deviation of x median(x) # calculate the median of x range(x) # calculate the range of x log(x) # calculate the natural log of x summary(x) # return 5-number summary of x ##Functions related to statistical distributions #returns the probability that a normal random variable with #mean 0 and standard deviation 1 is less than or equal to 1.96. pnorm(1.96, mean = 0, sd = 1) pnorm(1.96) # gives the same result # P(Z>1.96) pnorm(1.96, lower.tail=FALSE) 1-pnorm(1.96) # same result #returns the value x such that P(X<=x)=0.6 for which a uniform #random variable on the interval [0, 1] qunif(0.6, min = 0, max = 1) #returns the probability that Pr(X=2) for X~Binom(n=20, p = 0.2). dbinom(2, size = 20, prob = .2) #returns the density of an exponential random variable with mean = 1/2. dexp(1, rate = 4) #returns a sample of 100 observations from a chi-squared random #variable with 5 df rchisq(100, df = 5) ### BASIC PLOTTING #Histograms x <- rnorm(100, mean = 100, sd = 10) hist(x, xlab = "x-values", main = "Histogram of 100 observations from N(100, 10^2) distribution") #boxplots y <- rnorm(100, mean = 80, sd = 3) boxplot(y) #parallel boxplots grp <- factor(rep(c("Grp 1", "Grp 2"), each = 100)) #make groups for x and y dat <- c(x, y) boxplot(dat ~ grp, xlab = "Group") #scatterplots #generate vectors x <- runif(20) y <- 2 + 3 * x + rnorm(20) #plot x on x-axis and y on y-axis plot(x, y) #plot scatterplot and title plot(x, y, xlab="1st variable", ylab="2nd variable", main="Title of the plot") abline(2,3,col="blue") #plot of normal(0,1) density x <- seq(-4, 4, len = 1000) y <- dnorm(x, mean = 0, sd = 1) plot(x, y, xlab="x", ylab="density", type = "l") title("Density of Standard Normal") #plot of Binomial(n = 20, p = .3) pmf x <- 0:20 y <- dbinom(x, size = 20, prob = .3) plot(x, y, xlab="Number of successes", ylab="Probability", type = "h") title("Mass Function of Binomial(n = 20, p = .3)") ###Creating data frames #create a few vectors d <- c(1, 2, 3, 4) e <- c("red", "white", "blue", NA) f <- c(TRUE, TRUE, TRUE, FALSE) #creates dataframe and assigns it to mydataframe mydataframe <- data.frame(d,e,f) mydataframe #rename columns of data frame names(mydataframe) <- c("ID", "Color", "Passed") mydataframe #name columns while creating data frame mydataframe2 <- data.frame(ID=d, Color=e, Passed=f); mydataframe2 ###Accessing data in dataframes #access Color column mydataframef$Color #access first row mydataframe[1,] #access third column mydataframe[,3] #access ID column of dataframe2 and store it in newID newID <- mydataframe2$ID newID ###Importing data from file setwd("PFW Teaching//QuickRCourse") getwd() data <- read.table("example.txt", header = TRUE, sep = "\t") str(data) mean(data$Third) data2 <- read.table(file = file.choose(), header = TRUE, sep = "\t") data2 ###Accessing vectors using index vector a <- seq(2, 16, by = 2); a #access the 2nd, 4th, and 6th elements of a a[c(2, 4, 6)] #access elements 3 through 6 of a a[3:6] #access all elements in a except the 2nd, 4th, and 6th elements a[-c(2, 4, 6)] #access all elements in a except elements 3 through 6 a[-(3:6)] B <- matrix(data=1:12, nrow=3, ncol=4, byrow=T); B B[3,4] # Access the last entry in the last row of the matrix B[2,] # Second row of the matrix median(B[,4]) # What is the median of the forth column? ###Logical index vectors and operators #values greater than 10 a > 10 #values less than or equal to 4 a <= 4 #values equal to 10 a == 10 #values not equal to 10 a != 10 #values greater than 6 and less than or equal to 10 (a > 6) & (a <= 10) #values less than or equal to 4 or greater or equal to 12 (a <= 4)|(a >= 12) #elements of a less than 6 a[a < 6] #elements of a equal to 10 a[a == 10] #elements of a less than 6 or equal to 10 a[(a < 6)|(a == 10)] ###Creating functions #Example of a function that returns the standard deviation of a vector x stdev <- function(x) { s <- sqrt(sum((x - mean(x))^2)/(length(x) - 1)) s } z <- rnorm(20) stdev(z) sd(z) #Example of a function that returns the density of a N(mu, sigma^2) #random variable. I have the vector x, and the mean and standard deviation #of the normal distribution as arguments. normal.density <- function(x, mu = 0, sigma = 1) { return(exp(-(x - mu)^2/(2*sigma^2))/sqrt(2*pi*sigma^2)) } myseq <- seq(-4, 4, len = 100) #compare built in dnorm function to our function #notice that our function assumes mu = 0, sigma = 1 if #these arguments are not suppplied d1a <- dnorm(myseq, mean = 0, sd = 1) d1b <- normal.density(myseq) #compare results range(d1a - d1b) d2a <- dnorm(myseq, mean = 1, sd = 5) d2b <- normal.density(myseq, mu = 1, sigma = 5) range(d2a - d2b) #Example of a function that returns the mean and standard deviation of a vector. #The mean and standard deviation are return as a list. ms <- function(x) { m <- mean(x) s <- sd(x) return(list(m = m, s = s)) } y <- 1:10 rslt <- ms(y); rslt rslt$m-mean(y) #should give 0 # Packages install.packages ("faraway") library(faraway) ls("package:faraway") # Pima example. Output is already in the lecture notes require(faraway)# load package with the dataset only if is not already loaded data(pima) # load data head(pima) # first six rows of pima summary(pima) sort(pima$diastolic) # replace 0 for NA (missing value) pima$diastolic[pima$diastolic == 0] <- NA pima$glucose[pima$glucose == 0] <- NA pima$triceps[pima$triceps == 0] <- NA pima$insulin[pima$insulin == 0] <- NA pima$bmi[pima$bmi == 0] <- NA pima$test <- factor(pima$test) summary(pima$test) levels(pima$test) <-c("negative","positive") # more descriptive labels hist(pima$dias, xlab="Diastolic") with(pima, hist(diastolic)) # alternative 1 attach(pima) # alternative 2 hist(diastolic) detach(pima) plot(sort(pima$diastolic),ylab="Sorted Diastolic") plot(density(pima$diastolic,na.rm=TRUE), main="") plot(diabetes ~ diastolic, data = pima) # this is the 4th way to specify pima data plot(diabetes ~ test, data = pima) # QQplot2 if(!require("ggplot2")install.packages("ggplot2") library(ggplot2) ggpimax = ggplot(pima, aes(x=diastolic)) ggpimaxy = ggplot(pima, aes(x=diastolic, y=diabetes)) ggpimax + geom_histogram() ggpimax + geom_density() ggpimaxy + geom_point() ggplot(pima,aes(x=diastolic,y=diabetes,shape=test)) + geom_point() + theme(legend.position = "top", legend.direction = "horizontal") ggpimaxy + geom_point(size=1) + facet_grid(~ test)