###Help in R
help(lm) # get help on the "lm" command
#another way to get help on the "lm" command
?lm
#searches for help on functions related to logarithm
??logarithm

###Creating vectors using c
c(1, 2, 5.3, 6, -2, 4)
c("one", "two", "three")
c(TRUE, FALSE, TRUE)

###Creating vectors using seq

#sequence from 1 to 10 in increments of 1
seq(1, 10)
1:10 #same as seq(1,10)
#sequence of numbers from 1 to 20 in increments of 2
seq(1, 20, by = 2)
#sequence of numbers from 10 to 20 of length 100
seq(10, 20, len = 100)

###Creating vectors using rep
#repeat the vector 1:3 three times
rep(1:3, times = 3)
#trt1 once, trt2 twice, trt3 thrice
rep(c("trt1", "trt2", "trt3"), times = 1:3)
# repeat 1, 2, 3 four times times each
rep(1:3, each = 4)

v0 <- .Last.value; v0 # last output
###Assigning and accessing objects
#assign 1, 2, 3, 4, 5 to the variable v1
v1 <- 1:5
#accessing data stored in variable v1
v1
#create two vectors, then join them together in a new vector
v2 <- c(9, 10, 1)
new <- c(v1, v2); new

### matrices
A<-matrix(data=1:6, nrow=3,ncol=2); A
B<-matrix(data=1:6, nrow=5, ncol=10, byrow=T); B
dim(B) 


###Creating factors
f1 <- factor(rep(1:6,3))
f1
f2 <- factor(c("a",7,"blue", "blue"))
f2
is.factor(f2)

##Commonly Used Functions
x <- rnorm(50, mean=5, sd=1)#Generate random sample (n=50) of Normal(5,1)

str(x)   	# compact overview of object x
length(x) 	# return the length of x
sum(x) 		# sum the numbers in x
mean(x)		# calculate the mean of the numbers in x
var(x) 		# calculate the variance of the numbers in x
sd(x) 		# calculate the standard deviation of x
median(x) 	# calculate the median of x
range(x) 	# calculate the range of x
log(x) 		# calculate the natural log of x
summary(x) 	# return 5-number summary of x

##Functions related to statistical distributions
#returns the probability that a normal random variable with
#mean 0 and standard deviation 1 is less than or equal to 1.96.
pnorm(1.96, mean = 0, sd = 1)
pnorm(1.96) # gives the same result

#	P(Z>1.96)
pnorm(1.96, lower.tail=FALSE)
1-pnorm(1.96) # same result

#returns the value x such that P(X<=x)=0.6 for which a uniform
#random variable on the interval [0, 1]
qunif(0.6, min = 0, max = 1)

#returns the probability that Pr(X=2) for X~Binom(n=20, p = 0.2).
dbinom(2, size = 20, prob = .2)

#returns the density of an exponential random variable with mean = 1/2.
dexp(1, rate = 4)

#returns a sample of 100 observations from a chi-squared random
#variable with 5 df
rchisq(100, df = 5)



### BASIC PLOTTING
#Histograms
x <- rnorm(100, mean = 100, sd = 10)
hist(x, xlab = "x-values",
	main = "Histogram of 100 observations from N(100, 10^2) distribution")

#boxplots
y <- rnorm(100, mean = 80, sd = 3)
boxplot(y)

#parallel boxplots
grp <- factor(rep(c("Grp 1", "Grp 2"), each = 100)) #make groups for x and y
dat <- c(x, y)
boxplot(dat ~ grp, xlab = "Group")

#scatterplots
#generate vectors
x <- runif(20)
y <- 2 + 3 * x + rnorm(20)

#plot x on x-axis and y on y-axis
plot(x, y)

#plot scatterplot and title
plot(x, y, xlab="1st variable", ylab="2nd variable", main="Title of the plot")
abline(2,3,col="blue")


#plot of normal(0,1) density
x <- seq(-4, 4, len = 1000)
y <- dnorm(x, mean = 0, sd = 1)
plot(x, y, xlab="x", ylab="density", type = "l")
title("Density of Standard Normal")


#plot of Binomial(n = 20, p = .3) pmf
x <- 0:20
y <- dbinom(x, size = 20, prob = .3)
plot(x, y, xlab="Number of successes", ylab="Probability", type = "h")
title("Mass Function of Binomial(n = 20, p = .3)")


###Creating data frames
#create a few vectors
d <- c(1, 2, 3, 4)
e <- c("red", "white", "blue", NA)
f <- c(TRUE, TRUE, TRUE, FALSE)
#creates dataframe and assigns it to mydataframe
mydataframe <- data.frame(d,e,f)
mydataframe
#rename columns of data frame
names(mydataframe) <- c("ID", "Color", "Passed")
mydataframe

#name columns while creating data frame
mydataframe2 <- data.frame(ID=d, Color=e, Passed=f); mydataframe2

###Accessing data in dataframes
#access Color column
mydataframef$Color
#access first row
mydataframe[1,]
#access third column
mydataframe[,3]
#access ID column of dataframe2 and store it in newID
newID <- mydataframe2$ID
newID

###Importing data from file
setwd("PFW Teaching//QuickRCourse")
getwd()
data <- read.table("example.txt",
	header = TRUE, sep = "\t")
str(data)
mean(data$Third) 

data2 <- read.table(file = file.choose(), header = TRUE, sep = "\t")
data2

###Accessing vectors using index vector
a <- seq(2, 16, by = 2); a

#access the 2nd, 4th, and 6th elements of a
a[c(2, 4, 6)]
#access elements 3 through 6 of a
a[3:6]
#access all elements in a except the 2nd, 4th, and 6th elements
a[-c(2, 4, 6)]
#access all elements in a except elements 3 through 6
a[-(3:6)]

B <- matrix(data=1:12, nrow=3, ncol=4, byrow=T); B

B[3,4] 	# Access the last entry in the last row of the matrix
B[2,] 	# Second row of the matrix 
median(B[,4]) # What is the median of the forth column?

###Logical index vectors and operators
#values greater than 10
a > 10
#values less than or equal to 4
a <= 4
#values equal to 10
a == 10
#values not equal to 10
a != 10
#values greater than 6 and less than or equal to 10
(a > 6) & (a <= 10)
#values less than or equal to 4 or greater or equal to 12
(a <= 4)|(a >= 12)
#elements of a less than 6
a[a < 6]
#elements of a equal to 10
a[a == 10]
#elements of a less than 6 or equal to 10
a[(a < 6)|(a == 10)]


###Creating functions

#Example of a function that returns the standard deviation of a vector x
stdev <- function(x)
{
	s <- sqrt(sum((x - mean(x))^2)/(length(x) - 1))
	s
}

z <- rnorm(20)
stdev(z)
sd(z)

#Example of a function that returns the density of a N(mu, sigma^2)
#random variable.  I have the vector x, and the mean and standard deviation
#of the normal distribution as arguments.
normal.density <- function(x, mu = 0, sigma = 1)
{
	return(exp(-(x - mu)^2/(2*sigma^2))/sqrt(2*pi*sigma^2))
}

myseq <- seq(-4, 4, len = 100)

#compare built in dnorm function to our function
#notice that our function assumes mu = 0, sigma = 1 if
#these arguments are not suppplied
d1a <- dnorm(myseq, mean = 0, sd = 1)
d1b <- normal.density(myseq)
#compare results
range(d1a - d1b)

d2a <- dnorm(myseq, mean = 1, sd = 5)
d2b <- normal.density(myseq, mu = 1, sigma = 5)
range(d2a - d2b)

#Example of a function that returns the mean and standard deviation of a vector.
#The mean and standard deviation are return as a list.

ms <- function(x)
{
	m <- mean(x)
	s <- sd(x)
	return(list(m = m, s = s))
}

y <- 1:10
rslt <- ms(y); rslt
rslt$m-mean(y) #should give 0

# Packages
install.packages ("faraway")
library(faraway) 
ls("package:faraway")

# Pima example. Output is already in the lecture notes

require(faraway)# load package with the dataset only if is not already loaded
data(pima) 		# load data
head(pima) 		# first six rows of pima	
summary(pima)
sort(pima$diastolic)

# replace 0 for NA (missing value)

pima$diastolic[pima$diastolic == 0]  <- NA
pima$glucose[pima$glucose == 0] <- NA
pima$triceps[pima$triceps == 0]  <- NA
pima$insulin[pima$insulin == 0] <- NA
pima$bmi[pima$bmi == 0] <- NA

pima$test <- factor(pima$test)
summary(pima$test)

levels(pima$test) <-c("negative","positive") # more descriptive labels

hist(pima$dias, xlab="Diastolic")
with(pima, hist(diastolic)) # alternative 1

attach(pima) # alternative 2
hist(diastolic)
detach(pima)

plot(sort(pima$diastolic),ylab="Sorted Diastolic")

plot(density(pima$diastolic,na.rm=TRUE), main="")

plot(diabetes ~ diastolic, data = pima) # this is the 4th way to specify pima data
plot(diabetes ~ test, data = pima)



# QQplot2 
if(!require("ggplot2")install.packages("ggplot2")
library(ggplot2)

ggpimax  = ggplot(pima, aes(x=diastolic))
ggpimaxy = ggplot(pima, aes(x=diastolic, y=diabetes))
ggpimax + geom_histogram()
ggpimax + geom_density()
ggpimaxy + geom_point()
ggplot(pima,aes(x=diastolic,y=diabetes,shape=test)) + 
  geom_point() + 
  theme(legend.position = "top", legend.direction = "horizontal")
ggpimaxy + geom_point(size=1) + facet_grid(~ test)